import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_dytt():
    base_url = "https://www.dytt8.net/html/gndy/dyzz/index.html"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    try:
        # 1. 获取首页内容
        response = requests.get(base_url, headers=headers, timeout=10)
        response.encoding = 'gb2312'  # 电影天堂使用GB2312编码
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # 2. 提取电影列表
        movies = []
        for item in soup.select('.co_content8 ul table'):
            title = item.find('a').text
            publish_date = item.find('font').text
            detail_url = "https://www.dytt8.net" + item.find('a')['href']
            
            # 3. 进入详情页获取下载链接
            detail_response = requests.get(detail_url, headers=headers)
            detail_response.encoding = 'gb2312'
            detail_soup = BeautifulSoup(detail_response.text, 'html.parser')
            
            download_link = detail_soup.select_one('#Zoom a')['href']
            
            movies.append({
                "title": title,
                "publish_date": publish_date,
                "download_link": download_link
            })
            
            time.sleep(2)  # 避免请求过快被封
            
        # 4. 保存为CSV
        df = pd.DataFrame(movies)
        df.to_csv('data/movies.csv', index=False)
        print("数据已保存到 data/movies.csv")
        
    except Exception as e:
        print(f"爬取失败: {e}")

if __name__ == "__main__":
    scrape_dytt()